Lab Assignment Four: Multi-Layer Perceptron
Richmond Aisabor
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)
df_original = pd.read_csv('acs2017_census_tract_data.csv')
df_original.head()
| TractId | State | County | TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1001020100 | Alabama | Autauga County | 1845 | 899 | 946 | 2.4 | 86.3 | 5.2 | 0.0 | 1.2 | 0.0 | 1407 | 67826.0 | 14560.0 | 33018.0 | 6294.0 | 10.7 | 20.8 | 38.5 | 15.6 | 22.8 | 10.8 | 12.4 | 94.2 | 3.3 | 0.0 | 0.5 | 0.0 | 2.1 | 24.5 | 881 | 74.2 | 21.2 | 4.5 | 0.0 | 4.6 |
| 1 | 1001020200 | Alabama | Autauga County | 2172 | 1167 | 1005 | 1.1 | 41.6 | 54.5 | 0.0 | 1.0 | 0.0 | 1652 | 41287.0 | 3819.0 | 18996.0 | 2453.0 | 22.4 | 35.8 | 30.5 | 24.9 | 22.9 | 6.3 | 15.4 | 90.5 | 9.1 | 0.0 | 0.0 | 0.5 | 0.0 | 22.2 | 852 | 75.9 | 15.0 | 9.0 | 0.0 | 3.4 |
| 2 | 1001020300 | Alabama | Autauga County | 3385 | 1533 | 1852 | 8.0 | 61.4 | 26.5 | 0.6 | 0.7 | 0.4 | 2480 | 46806.0 | 9496.0 | 21236.0 | 2562.0 | 14.7 | 21.1 | 27.9 | 19.4 | 33.3 | 9.9 | 9.6 | 88.3 | 8.4 | 0.0 | 1.0 | 0.8 | 1.5 | 23.1 | 1482 | 73.3 | 21.1 | 4.8 | 0.7 | 4.7 |
| 3 | 1001020400 | Alabama | Autauga County | 4267 | 2001 | 2266 | 9.6 | 80.3 | 7.1 | 0.5 | 0.2 | 0.0 | 3257 | 55895.0 | 4369.0 | 28068.0 | 3190.0 | 2.3 | 1.7 | 29.0 | 16.6 | 25.8 | 9.1 | 19.5 | 82.3 | 11.2 | 0.0 | 1.5 | 2.9 | 2.1 | 25.9 | 1849 | 75.8 | 19.7 | 4.5 | 0.0 | 6.1 |
| 4 | 1001020500 | Alabama | Autauga County | 9965 | 5054 | 4911 | 0.9 | 77.5 | 16.4 | 0.0 | 3.1 | 0.0 | 7229 | 68143.0 | 14424.0 | 36905.0 | 10706.0 | 12.2 | 17.9 | 48.8 | 13.8 | 20.5 | 3.5 | 13.4 | 86.9 | 11.2 | 0.0 | 0.8 | 0.3 | 0.7 | 21.0 | 4787 | 71.4 | 24.1 | 4.5 | 0.0 | 2.3 |
df = df_original.dropna()
# TractId does not tell us anything
df = df.drop('TractId',axis = 1)
df.describe()
| TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 | 72718.000000 |
| mean | 4443.485121 | 2184.362647 | 2259.122473 | 17.282951 | 61.337143 | 13.254417 | 0.727776 | 4.752459 | 0.146082 | 3141.161982 | 61119.999326 | 9690.325642 | 30666.653222 | 4249.725969 | 16.046724 | 21.148476 | 35.553813 | 18.847948 | 23.413165 | 9.263044 | 12.922312 | 75.880830 | 9.324557 | 5.370585 | 2.925778 | 1.886076 | 4.612646 | 26.080334 | 2081.309139 | 79.511827 | 14.149495 | 6.167661 | 0.171231 | 7.224917 |
| std | 2190.183318 | 1099.954423 | 1124.604806 | 23.084428 | 30.628031 | 21.581269 | 4.505791 | 8.995573 | 1.015198 | 1512.610257 | 30511.062580 | 6119.407315 | 15844.127467 | 2991.009809 | 12.428643 | 18.572714 | 15.038790 | 7.969609 | 5.591354 | 5.943849 | 7.592511 | 14.960499 | 5.107717 | 11.626060 | 5.260623 | 2.485812 | 3.770733 | 7.095680 | 1120.109805 | 7.957350 | 7.164790 | 3.798703 | 0.451630 | 5.099419 |
| min | 58.000000 | 26.000000 | 27.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 40.000000 | 2692.000000 | 728.000000 | 1631.000000 | 351.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 4.200000 | 20.000000 | 17.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 2958.000000 | 1440.000000 | 1494.000000 | 2.600000 | 38.100000 | 0.800000 | 0.000000 | 0.200000 | 0.000000 | 2096.000000 | 40380.000000 | 5737.000000 | 20624.000000 | 2508.000000 | 6.900000 | 6.200000 | 24.700000 | 13.300000 | 19.700000 | 5.000000 | 7.200000 | 72.400000 | 5.800000 | 0.000000 | 0.400000 | 0.400000 | 2.000000 | 21.100000 | 1306.000000 | 75.300000 | 9.300000 | 3.500000 | 0.000000 | 3.900000 |
| 50% | 4137.000000 | 2024.000000 | 2102.000000 | 7.400000 | 70.500000 | 3.800000 | 0.000000 | 1.500000 | 0.000000 | 2926.000000 | 54413.000000 | 8268.000000 | 27249.000000 | 3404.000000 | 12.600000 | 16.300000 | 33.300000 | 17.700000 | 23.200000 | 8.400000 | 11.800000 | 79.900000 | 8.500000 | 1.000000 | 1.400000 | 1.200000 | 3.800000 | 25.400000 | 1915.000000 | 80.600000 | 13.000000 | 5.500000 | 0.000000 | 6.000000 |
| 75% | 5532.750000 | 2719.000000 | 2817.000000 | 21.100000 | 87.700000 | 14.500000 | 0.400000 | 5.000000 | 0.000000 | 3923.000000 | 74688.000000 | 11909.000000 | 36413.000000 | 4959.000000 | 21.800000 | 31.600000 | 44.800000 | 23.200000 | 26.900000 | 12.500000 | 17.500000 | 84.900000 | 11.900000 | 4.600000 | 3.300000 | 2.500000 | 6.300000 | 30.300000 | 2651.000000 | 85.000000 | 17.600000 | 8.000000 | 0.000000 | 9.000000 |
| max | 65528.000000 | 32266.000000 | 33262.000000 | 100.000000 | 100.000000 | 100.000000 | 99.400000 | 91.400000 | 71.900000 | 39389.000000 | 249750.000000 | 153365.000000 | 220253.000000 | 84414.000000 | 100.000000 | 100.000000 | 92.200000 | 70.900000 | 72.300000 | 68.100000 | 60.500000 | 100.000000 | 64.000000 | 90.800000 | 77.700000 | 53.400000 | 82.800000 | 73.900000 | 28945.000000 | 100.000000 | 80.700000 | 47.400000 | 22.300000 | 62.800000 |
# find the data type
print(df.info())
<class 'pandas.core.frame.DataFrame'> Int64Index: 72718 entries, 0 to 74000 Data columns (total 36 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 State 72718 non-null object 1 County 72718 non-null object 2 TotalPop 72718 non-null int64 3 Men 72718 non-null int64 4 Women 72718 non-null int64 5 Hispanic 72718 non-null float64 6 White 72718 non-null float64 7 Black 72718 non-null float64 8 Native 72718 non-null float64 9 Asian 72718 non-null float64 10 Pacific 72718 non-null float64 11 VotingAgeCitizen 72718 non-null int64 12 Income 72718 non-null float64 13 IncomeErr 72718 non-null float64 14 IncomePerCap 72718 non-null float64 15 IncomePerCapErr 72718 non-null float64 16 Poverty 72718 non-null float64 17 ChildPoverty 72718 non-null float64 18 Professional 72718 non-null float64 19 Service 72718 non-null float64 20 Office 72718 non-null float64 21 Construction 72718 non-null float64 22 Production 72718 non-null float64 23 Drive 72718 non-null float64 24 Carpool 72718 non-null float64 25 Transit 72718 non-null float64 26 Walk 72718 non-null float64 27 OtherTransp 72718 non-null float64 28 WorkAtHome 72718 non-null float64 29 MeanCommute 72718 non-null float64 30 Employed 72718 non-null int64 31 PrivateWork 72718 non-null float64 32 PublicWork 72718 non-null float64 33 SelfEmployed 72718 non-null float64 34 FamilyWork 72718 non-null float64 35 Unemployment 72718 non-null float64 dtypes: float64(29), int64(5), object(2) memory usage: 20.5+ MB None
from sklearn.preprocessing import LabelEncoder
# Label encode State and Country features
label_encoder = LabelEncoder()
df['County'] = label_encoder.fit_transform(df['County'])
df['State'] = label_encoder.fit_transform(df['State'])
df.head()
| State | County | TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 89 | 1845 | 899 | 946 | 2.4 | 86.3 | 5.2 | 0.0 | 1.2 | 0.0 | 1407 | 67826.0 | 14560.0 | 33018.0 | 6294.0 | 10.7 | 20.8 | 38.5 | 15.6 | 22.8 | 10.8 | 12.4 | 94.2 | 3.3 | 0.0 | 0.5 | 0.0 | 2.1 | 24.5 | 881 | 74.2 | 21.2 | 4.5 | 0.0 | 4.6 |
| 1 | 0 | 89 | 2172 | 1167 | 1005 | 1.1 | 41.6 | 54.5 | 0.0 | 1.0 | 0.0 | 1652 | 41287.0 | 3819.0 | 18996.0 | 2453.0 | 22.4 | 35.8 | 30.5 | 24.9 | 22.9 | 6.3 | 15.4 | 90.5 | 9.1 | 0.0 | 0.0 | 0.5 | 0.0 | 22.2 | 852 | 75.9 | 15.0 | 9.0 | 0.0 | 3.4 |
| 2 | 0 | 89 | 3385 | 1533 | 1852 | 8.0 | 61.4 | 26.5 | 0.6 | 0.7 | 0.4 | 2480 | 46806.0 | 9496.0 | 21236.0 | 2562.0 | 14.7 | 21.1 | 27.9 | 19.4 | 33.3 | 9.9 | 9.6 | 88.3 | 8.4 | 0.0 | 1.0 | 0.8 | 1.5 | 23.1 | 1482 | 73.3 | 21.1 | 4.8 | 0.7 | 4.7 |
| 3 | 0 | 89 | 4267 | 2001 | 2266 | 9.6 | 80.3 | 7.1 | 0.5 | 0.2 | 0.0 | 3257 | 55895.0 | 4369.0 | 28068.0 | 3190.0 | 2.3 | 1.7 | 29.0 | 16.6 | 25.8 | 9.1 | 19.5 | 82.3 | 11.2 | 0.0 | 1.5 | 2.9 | 2.1 | 25.9 | 1849 | 75.8 | 19.7 | 4.5 | 0.0 | 6.1 |
| 4 | 0 | 89 | 9965 | 5054 | 4911 | 0.9 | 77.5 | 16.4 | 0.0 | 3.1 | 0.0 | 7229 | 68143.0 | 14424.0 | 36905.0 | 10706.0 | 12.2 | 17.9 | 48.8 | 13.8 | 20.5 | 3.5 | 13.4 | 86.9 | 11.2 | 0.0 | 0.8 | 0.3 | 0.7 | 21.0 | 4787 | 71.4 | 24.1 | 4.5 | 0.0 | 2.3 |
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
fig = px.box(x=df['ChildPoverty'])
fig.update_xaxes(title_text='Child Povery (%)')
fig.show()
The box plot shows a normal distribution that is left skewed, since the median child povery rate is 16.3%. The minimum child povery rate is 0% and the maximum is 100%. At the first and second quartiles, the child poverty rate is 6.2% and 31.6%, respectively. This information helps classify the dataset even further and divide the quality values into 4 ranges. The census samples will be divided into four classes:
df['ChildPoverty'] = pd.qcut(df['ChildPoverty'], 4, labels=['low','medium','high','vhigh'])
# replace quality score to numrical indicator
df.ChildPoverty.replace(to_replace = ['low','medium', 'high', 'vhigh'],
value = {0: 'low', 1: 'medium', 2: 'high', 3: 'vhigh'}, inplace = True)
df.head()
| State | County | TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 89 | 1845 | 899 | 946 | 2.4 | 86.3 | 5.2 | 0.0 | 1.2 | 0.0 | 1407 | 67826.0 | 14560.0 | 33018.0 | 6294.0 | 10.7 | 2 | 38.5 | 15.6 | 22.8 | 10.8 | 12.4 | 94.2 | 3.3 | 0.0 | 0.5 | 0.0 | 2.1 | 24.5 | 881 | 74.2 | 21.2 | 4.5 | 0.0 | 4.6 |
| 1 | 0 | 89 | 2172 | 1167 | 1005 | 1.1 | 41.6 | 54.5 | 0.0 | 1.0 | 0.0 | 1652 | 41287.0 | 3819.0 | 18996.0 | 2453.0 | 22.4 | 3 | 30.5 | 24.9 | 22.9 | 6.3 | 15.4 | 90.5 | 9.1 | 0.0 | 0.0 | 0.5 | 0.0 | 22.2 | 852 | 75.9 | 15.0 | 9.0 | 0.0 | 3.4 |
| 2 | 0 | 89 | 3385 | 1533 | 1852 | 8.0 | 61.4 | 26.5 | 0.6 | 0.7 | 0.4 | 2480 | 46806.0 | 9496.0 | 21236.0 | 2562.0 | 14.7 | 2 | 27.9 | 19.4 | 33.3 | 9.9 | 9.6 | 88.3 | 8.4 | 0.0 | 1.0 | 0.8 | 1.5 | 23.1 | 1482 | 73.3 | 21.1 | 4.8 | 0.7 | 4.7 |
| 3 | 0 | 89 | 4267 | 2001 | 2266 | 9.6 | 80.3 | 7.1 | 0.5 | 0.2 | 0.0 | 3257 | 55895.0 | 4369.0 | 28068.0 | 3190.0 | 2.3 | 0 | 29.0 | 16.6 | 25.8 | 9.1 | 19.5 | 82.3 | 11.2 | 0.0 | 1.5 | 2.9 | 2.1 | 25.9 | 1849 | 75.8 | 19.7 | 4.5 | 0.0 | 6.1 |
| 4 | 0 | 89 | 9965 | 5054 | 4911 | 0.9 | 77.5 | 16.4 | 0.0 | 3.1 | 0.0 | 7229 | 68143.0 | 14424.0 | 36905.0 | 10706.0 | 12.2 | 2 | 48.8 | 13.8 | 20.5 | 3.5 | 13.4 | 86.9 | 11.2 | 0.0 | 0.8 | 0.3 | 0.7 | 21.0 | 4787 | 71.4 | 24.1 | 4.5 | 0.0 | 2.3 |
plt.figure(figsize=(10,7))
sns.distplot(df.ChildPoverty)
plt.title('Distribution of ChildPoverty')
Text(0.5, 1.0, 'Distribution of ChildPoverty')
The child poverty distribution plot shows that each child poverty class has about the same number of observations, so the dataset is balanced. The method chosen to balance the dataset was using a quantization threshold to divide the child poverty data into four classes. The quanitzation threshold was derived by finding the dynamic ranges between each quartile of the child poverty variable.
The training dataset should be balanced to prevent adding a bias in favor of the class with the most instances to the model. The traiing dataset should also be balanced because there is equal interest in the classification performance of each class in the dataset. The advantage of using a test set that has a similar distribution as the training dataset is that it gives a more realisitic performance estimation of how the model will perform in production.
from sklearn.model_selection import train_test_split
import copy
# don't forget to normalize
from sklearn.preprocessing import StandardScaler
std_scaler = StandardScaler()
# create two data frames seperated by data and target
X = copy.deepcopy(df)
remove = ["ChildPoverty", "TractId"]
#categorical data
categories = ["State", "County"]
#normalize continuous features
X_normalized = X.copy()
#remove 'remove' columns then remove 'categories' columns
continuous = list(df[[col for col in list(X.columns) if col not in remove]])
continuous = [i for i in continuous if i not in categories]
X_normalized[continuous] = std_scaler.fit_transform(X_normalized[continuous])
y = df['ChildPoverty'].copy()
y = y.values
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, train_size=0.8, random_state=123)
print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1]))
X_train_new, X_test_new = train_test_split(X_normalized,
test_size=0.2,
random_state=1)
X_train_new.head()
Rows: 58174, columns: 36
| State | County | TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 64327 | 44 | 769 | 0.302495 | 0.038763 | 0.551200 | 0.637536 | -1.855736 | 2.291150 | -0.161521 | -0.506081 | -0.143896 | -0.341242 | -1.089219 | -1.106704 | -0.959583 | -0.828062 | 1.331874 | 3 | -1.439874 | 2.352961 | -0.288513 | 0.696011 | 0.062916 | 0.362234 | -0.181013 | -0.066281 | -0.442114 | 0.086058 | -0.427677 | 0.016865 | 0.268450 | 0.413228 | -0.244181 | -0.386361 | -0.379143 | 1.524710 |
| 52250 | 35 | 1040 | 0.207982 | -0.189430 | 0.590325 | -0.627395 | 0.864014 | -0.405652 | -0.161521 | -0.528314 | -0.143896 | 0.344994 | -0.822461 | -0.261028 | -0.820919 | -0.396433 | 0.712333 | 3 | -1.253687 | 0.056722 | -0.270628 | -0.633103 | 3.118580 | -0.252723 | 3.480139 | -0.461947 | -0.556169 | -0.758742 | -1.011122 | -1.195149 | 0.005973 | 1.933845 | -1.514290 | -1.149785 | -0.379143 | -0.044107 |
| 45772 | 32 | 1241 | 1.374559 | 1.033357 | 1.666266 | -0.198531 | 0.410178 | -0.424186 | -0.094940 | 0.294318 | -0.143896 | 1.791510 | 1.950027 | 3.210737 | 4.292304 | 3.349151 | -0.816404 | 0 | 2.662878 | -1.850536 | -1.111217 | -1.289248 | -1.504429 | -4.537368 | -1.453606 | 5.352617 | 0.945564 | 1.011316 | 1.481779 | 0.735617 | 2.527172 | -0.428767 | -0.956000 | 2.746309 | -0.379143 | -0.965787 |
| 27546 | 17 | 628 | 0.763647 | 0.925170 | 0.582322 | -0.480108 | 0.279578 | 0.076251 | -0.161521 | 0.105335 | -0.143896 | 0.865951 | -0.014487 | -0.173601 | -0.136118 | -0.481354 | -0.357783 | 2 | -0.309456 | -0.583213 | 1.893443 | -0.262970 | 0.036574 | -0.292828 | 2.638270 | -0.375933 | -0.309049 | -0.316227 | -0.613319 | -1.237428 | 1.184437 | -1.157658 | 1.416730 | -0.202087 | -0.379143 | -0.652023 |
| 17745 | 9 | 1398 | -0.605654 | -0.693090 | -0.501622 | -0.003593 | 0.452623 | -0.553929 | -0.161521 | -0.317098 | -0.143896 | -0.519742 | 0.617157 | 1.670872 | 0.094505 | 0.291968 | -0.993416 | 1 | 0.129412 | -0.507927 | 0.337458 | 0.897902 | -0.674658 | 0.335497 | -0.063543 | -0.461947 | -0.556169 | 1.614745 | -0.082914 | -0.293185 | -0.430594 | -0.127157 | 0.062878 | 0.192788 | -0.379143 | -0.318650 |
import numpy as np
from scipy.special import expit
import pandas as pd
import sys
# start with a simple base classifier, which can't be fit or predicted
# it only has internal classes to be used by classes that will subclass it
class TwoLayerPerceptronBase(object):
def __init__(self, n_hidden=30,
C=0.0, epochs=500, eta=0.001, random_state=None):
np.random.seed(random_state)
self.n_hidden = n_hidden
self.l2_C = C
self.epochs = epochs
self.eta = eta
@staticmethod
def _encode_labels(y):
"""Encode labels into one-hot representation"""
onehot = pd.get_dummies(y).values.T
return onehot
def _initialize_weights(self):
"""Initialize weights Glorot and the normalization."""
init_bound = 4*np.sqrt(6. / (self.n_hidden + self.n_features_ + 1))
W1 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_features_ + 1))
W1[:,:1] = 0
# reduce the final layer magnitude in order to balance the size of the gradients
# between
init_bound = 4*np.sqrt(6 / (self.n_output_ + self.n_hidden + 1))
W2 = np.random.uniform(-init_bound, init_bound,(self.n_output_, self.n_hidden + 1))
W2[:,:1] = 0
return W1, W2
@staticmethod
def _relu(Z):
return np.maximum(0,Z.copy())
@staticmethod
def _sigmoid(z):
"""Use scipy.special.expit to avoid overflow"""
# 1.0 / (1.0 + np.exp(-z))
return expit(z)
@staticmethod
def _add_bias_unit(X, how='column'):
"""Add bias unit (column or row of 1s) to array at index 0"""
if how == 'column':
ones = np.ones((X.shape[0], 1))
X_new = np.hstack((ones, X))
elif how == 'row':
ones = np.ones((1, X.shape[1]))
X_new = np.vstack((ones, X))
return X_new
@staticmethod
def _L2_reg(lambda_, W1, W2):
"""Compute L2-regularization cost"""
# only compute for non-bias terms
return (lambda_/2.0) * np.sqrt(np.mean(W1[:, 1:] ** 2) + np.mean(W2[:, 1:] ** 2))
def _cost(self,A3,Y_enc,W1,W2):
'''Get the objective function value'''
cost = -np.mean(np.nan_to_num((Y_enc*np.log(A3)+(1-Y_enc)*np.log(1-A3))))
L2_term = self._L2_reg(self.l2_C, W1, W2)
return cost + L2_term
def _feedforward(self, X, W1, W2):
"""Compute feedforward step
"""
# A1->W1->ReLu->A2->W2->Sigmoid
A1 = self._add_bias_unit(X.T, how='row')
Z1 = W1 @ A1
A2 = self._relu(Z1)
A2 = self._add_bias_unit(A2, how='row')
Z2 = W2 @ A2
A3 = self._sigmoid(Z2) # never use relu as last layer for classification (yuck!)
return A1, Z1, A2, Z2, A3
def _get_gradient(self, A1, A2, A3, Z1, Z2, Y_enc, W1, W2):
""" Compute gradient step using backpropagation.
"""
# vectorized backpropagation
V2 = (A3-Y_enc)
# V3[Z2<=0] = 0 # could can change to be relu back prop on this layer too!
# old update: V1 = A2*(1-A2)*(W2.T @ V2)
# the derivative of sigmoid was A2(1-A2), but now that is relu
# so we change it to:
Z1_with_bias = self._add_bias_unit(Z1,how='row')
V1 = (W2.T @ V2)
V1[Z1_with_bias<=0] = 0
# relu derivative only zeros out certain values! easy!
grad2 = V2 @ A2.T
grad1 = V1[1:,:] @ A1.T
# regularize weights that are not bias terms
grad1[:, 1:] += (W1[:, 1:] * self.l2_C)
grad2[:, 1:] += (W2[:, 1:] * self.l2_C)
return grad1, grad2
def predict(self, X):
"""Predict class labels"""
_, _, _, _, A3 = self._feedforward(X, self.W1, self.W2)
y_pred = np.argmax(A3, axis=0)
return y_pred
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
#use print_result function from class to
plt.style.use('ggplot')
def print_result(nn, X_train, y_train, X_test, y_test, title="", color="red"):
print("=================")
print(title,":")
yhat = nn.predict(X_train)
print('Resubstitution acc:',accuracy_score(y_train,yhat))
yhat = nn.predict(X_test)
print('Validation acc:',accuracy_score(y_test,yhat))
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
if hasattr(nn,'val_score_'):
plt.plot(range(len(nn.val_score_)), nn.val_score_, color=color,label=title)
plt.ylabel('Validation Accuracy')
else:
plt.plot(range(len(nn.score_)), nn.score_, color=color,label=title)
plt.ylabel('Resub Accuracy')
plt.xlabel('Epochs')
plt.tight_layout()
plt.legend(loc='best')
plt.grid(True)
#plot cost
#print(nn.cost_)
#plt.subplot(1,2,2)
#cost_avgs = [np.mean(x) for x in nn.cost_]
#plt.plot(range(len(cost_avgs)), cost_avgs, label='Avg Cost', color=color)
#plt.ylabel('Cost')
#plt.xlabel('Epochs')
#plt.tight_layout()
#plt.legend(loc='best')
#plt.grid(True)
from sklearn.metrics import accuracy_score
# just start with the vectorized version and minibatch
class TLPMiniBatch(TwoLayerPerceptronBase):
def __init__(self, alpha=0.0, decrease_const=0.0, shuffle=True,
minibatches=1, **kwds):
# need to add to the original initializer
self.alpha = alpha
self.decrease_const = decrease_const
self.shuffle = shuffle
self.minibatches = minibatches
# but keep other keywords
super().__init__(**kwds)
def fit(self, X, y, print_progress=False, encode=False):
""" Learn weights from training data. With mini-batch"""
X_data, y_data = X.copy(), y.copy()
Y_enc = self._encode_labels(y)
# init weights and setup matrices
self.n_features_ = X_data.shape[1]
self.n_output_ = Y_enc.shape[0]
self.W1, self.W2 = self._initialize_weights()
delta_W1_prev = np.zeros(self.W1.shape)
delta_W2_prev = np.zeros(self.W2.shape)
self.cost_ = []
self.score_ = []
# get starting acc
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
for i in range(self.epochs):
# adaptive learning rate
self.eta /= (1 + self.decrease_const*i)
if print_progress>0 and (i+1)%print_progress==0:
sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs))
sys.stderr.flush()
if self.shuffle:
idx_shuffle = np.random.permutation(y_data.shape[0])
X_data, Y_enc, y_data = X_data.reindex([idx_shuffle]), Y_enc[:, idx_shuffle], y_data[idx_shuffle]
mini = np.array_split(range(y_data.shape[0]), self.minibatches)
mini_cost = []
for idx in mini:
# feedforward
A1, Z1, A2, Z2, A3 = self._feedforward(X_data.reindex([idx]),
self.W1,
self.W2)
cost = self._cost(A3,Y_enc[:, idx],self.W1,self.W2)
mini_cost.append(cost) # this appends cost of mini-batch only
# compute gradient via backpropagation
grad1, grad2 = self._get_gradient(A1=A1, A2=A2, A3=A3, Z1=Z1, Z2=Z2,
Y_enc=Y_enc[:, idx],
W1=self.W1,W2=self.W2)
# momentum calculations
delta_W1, delta_W2 = self.eta * grad1, self.eta * grad2
self.W1 -= (delta_W1 + (self.alpha * delta_W1_prev))
self.W2 -= (delta_W2 + (self.alpha * delta_W2_prev))
delta_W1_prev, delta_W2_prev = delta_W1, delta_W2
self.cost_.append(mini_cost)
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
return self
vals = {'n_hidden':30,
'C':0.01, 'epochs':20, 'eta':0.001,
'alpha':0.001, 'decrease_const':1e-5, 'minibatches':50,
'shuffle':True,'random_state':1}
nn_relu = TLPMiniBatch(**vals) # same as previous parameter values
%time nn_relu.fit(X_train, y_train, print_progress=True)
print_result(nn_relu, X_train, y_train, X_test, y_test, title="ReLu",color="blue")
Epoch: 20/20
CPU times: user 14.4 s, sys: 1.14 s, total: 15.5 s Wall time: 9.41 s ================= ReLu : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206
vals = {'n_hidden':30,
'C':0.01, 'epochs':20, 'eta':0.001,
'alpha':0.001, 'decrease_const':1e-5, 'minibatches':50,
'shuffle':True,'random_state':1}
nn_relu1 = TLPMiniBatch(**vals) # same as previous parameter values
%time nn_relu1.fit(X_train_new, y_train, print_progress=True)
yhat = nn_relu1.predict(X_test_new)
print('Accuracy:',accuracy_score(y_test,yhat))
print_result(nn_relu1, X_train_new, y_train, X_test_new, y_test, title="ReLu",color="blue")
Epoch: 20/20
CPU times: user 15.4 s, sys: 1.13 s, total: 16.5 s Wall time: 9.9 s Accuracy: 0.24456820682068206 ================= ReLu : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206
from sklearn.preprocessing import OneHotEncoder
# creating instance of one-hot-encoder
#enc = OneHotEncoder(handle_unknown='ignore')
#enc_df = pd.DataFrame(enc.fit_transform(X_train[['County']]).toarray())
# generate binary values using get_dummies
dum_df = pd.get_dummies(X, columns=['County'], prefix=["County"] )
X_temp = X.drop('County',axis = 1)
temp = X_temp.merge(dum_df)
temp.head()
| State | TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | County_0 | County_1 | County_2 | County_3 | County_4 | County_5 | County_6 | County_7 | County_8 | County_9 | County_10 | County_11 | County_12 | County_13 | County_14 | County_15 | County_16 | County_17 | County_18 | County_19 | County_20 | County_21 | County_22 | County_23 | County_24 | County_25 | County_26 | County_27 | County_28 | County_29 | County_30 | County_31 | County_32 | County_33 | County_34 | County_35 | County_36 | County_37 | County_38 | County_39 | County_40 | County_41 | County_42 | County_43 | County_44 | County_45 | County_46 | County_47 | County_48 | County_49 | County_50 | County_51 | County_52 | County_53 | County_54 | County_55 | County_56 | County_57 | County_58 | County_59 | County_60 | County_61 | County_62 | County_63 | County_64 | County_65 | County_66 | County_67 | County_68 | County_69 | County_70 | County_71 | County_72 | County_73 | County_74 | County_75 | County_76 | County_77 | County_78 | County_79 | County_80 | County_81 | County_82 | County_83 | County_84 | County_85 | County_86 | County_87 | County_88 | County_89 | County_90 | County_91 | County_92 | County_93 | County_94 | County_95 | County_96 | County_97 | County_98 | County_99 | County_100 | County_101 | County_102 | County_103 | County_104 | County_105 | County_106 | County_107 | County_108 | County_109 | County_110 | County_111 | County_112 | County_113 | County_114 | County_115 | County_116 | County_117 | County_118 | County_119 | County_120 | County_121 | County_122 | County_123 | County_124 | County_125 | County_126 | County_127 | County_128 | County_129 | County_130 | County_131 | County_132 | County_133 | County_134 | County_135 | County_136 | County_137 | County_138 | County_139 | County_140 | County_141 | County_142 | County_143 | County_144 | County_145 | County_146 | County_147 | County_148 | County_149 | County_150 | County_151 | County_152 | County_153 | County_154 | County_155 | County_156 | County_157 | County_158 | County_159 | County_160 | County_161 | County_162 | County_163 | County_164 | County_165 | County_166 | County_167 | County_168 | County_169 | County_170 | County_171 | County_172 | County_173 | County_174 | County_175 | County_176 | County_177 | County_178 | County_179 | County_180 | County_181 | County_182 | County_183 | County_184 | County_185 | County_186 | County_187 | County_188 | County_189 | County_190 | County_191 | County_192 | County_193 | County_194 | County_195 | County_196 | County_197 | County_198 | County_199 | County_200 | County_201 | County_202 | County_203 | County_204 | County_205 | County_206 | County_207 | County_208 | County_209 | County_210 | County_211 | County_212 | County_213 | County_214 | ... | County_1704 | County_1705 | County_1706 | County_1707 | County_1708 | County_1709 | County_1710 | County_1711 | County_1712 | County_1713 | County_1714 | County_1715 | County_1716 | County_1717 | County_1718 | County_1719 | County_1720 | County_1721 | County_1722 | County_1723 | County_1724 | County_1725 | County_1726 | County_1727 | County_1728 | County_1729 | County_1730 | County_1731 | County_1732 | County_1733 | County_1734 | County_1735 | County_1736 | County_1737 | County_1738 | County_1739 | County_1740 | County_1741 | County_1742 | County_1743 | County_1744 | County_1745 | County_1746 | County_1747 | County_1748 | County_1749 | County_1750 | County_1751 | County_1752 | County_1753 | County_1754 | County_1755 | County_1756 | County_1757 | County_1758 | County_1759 | County_1760 | County_1761 | County_1762 | County_1763 | County_1764 | County_1765 | County_1766 | County_1767 | County_1768 | County_1769 | County_1770 | County_1771 | County_1772 | County_1773 | County_1774 | County_1775 | County_1776 | County_1777 | County_1778 | County_1779 | County_1780 | County_1781 | County_1782 | County_1783 | County_1784 | County_1785 | County_1786 | County_1787 | County_1788 | County_1789 | County_1790 | County_1791 | County_1792 | County_1793 | County_1794 | County_1795 | County_1796 | County_1797 | County_1798 | County_1799 | County_1800 | County_1801 | County_1802 | County_1803 | County_1804 | County_1805 | County_1806 | County_1807 | County_1808 | County_1809 | County_1810 | County_1811 | County_1812 | County_1813 | County_1814 | County_1815 | County_1816 | County_1817 | County_1818 | County_1819 | County_1820 | County_1821 | County_1822 | County_1823 | County_1824 | County_1825 | County_1826 | County_1827 | County_1828 | County_1829 | County_1830 | County_1831 | County_1832 | County_1833 | County_1834 | County_1835 | County_1836 | County_1837 | County_1838 | County_1839 | County_1840 | County_1841 | County_1842 | County_1843 | County_1844 | County_1845 | County_1846 | County_1847 | County_1848 | County_1849 | County_1850 | County_1851 | County_1852 | County_1853 | County_1854 | County_1855 | County_1856 | County_1857 | County_1858 | County_1859 | County_1860 | County_1861 | County_1862 | County_1863 | County_1864 | County_1865 | County_1866 | County_1867 | County_1868 | County_1869 | County_1870 | County_1871 | County_1872 | County_1873 | County_1874 | County_1875 | County_1876 | County_1877 | County_1878 | County_1879 | County_1880 | County_1881 | County_1882 | County_1883 | County_1884 | County_1885 | County_1886 | County_1887 | County_1888 | County_1889 | County_1890 | County_1891 | County_1892 | County_1893 | County_1894 | County_1895 | County_1896 | County_1897 | County_1898 | County_1899 | County_1900 | County_1901 | County_1902 | County_1903 | County_1904 | County_1905 | County_1906 | County_1907 | County_1908 | County_1909 | County_1910 | County_1911 | County_1912 | County_1913 | County_1914 | County_1915 | County_1916 | County_1917 | County_1918 | County_1919 | County_1920 | County_1921 | County_1922 | County_1923 | County_1924 | County_1925 | County_1926 | County_1927 | County_1928 | County_1929 | County_1930 | County_1931 | County_1932 | County_1933 | County_1934 | County_1935 | County_1936 | County_1937 | County_1938 | County_1939 | County_1940 | County_1941 | County_1942 | County_1943 | County_1944 | County_1945 | County_1946 | County_1947 | County_1948 | County_1949 | County_1950 | County_1951 | County_1952 | County_1953 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1845 | 899 | 946 | 2.4 | 86.3 | 5.2 | 0.0 | 1.2 | 0.0 | 1407 | 67826.0 | 14560.0 | 33018.0 | 6294.0 | 10.7 | 2 | 38.5 | 15.6 | 22.8 | 10.8 | 12.4 | 94.2 | 3.3 | 0.0 | 0.5 | 0.0 | 2.1 | 24.5 | 881 | 74.2 | 21.2 | 4.5 | 0.0 | 4.6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 2172 | 1167 | 1005 | 1.1 | 41.6 | 54.5 | 0.0 | 1.0 | 0.0 | 1652 | 41287.0 | 3819.0 | 18996.0 | 2453.0 | 22.4 | 3 | 30.5 | 24.9 | 22.9 | 6.3 | 15.4 | 90.5 | 9.1 | 0.0 | 0.0 | 0.5 | 0.0 | 22.2 | 852 | 75.9 | 15.0 | 9.0 | 0.0 | 3.4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 0 | 3385 | 1533 | 1852 | 8.0 | 61.4 | 26.5 | 0.6 | 0.7 | 0.4 | 2480 | 46806.0 | 9496.0 | 21236.0 | 2562.0 | 14.7 | 2 | 27.9 | 19.4 | 33.3 | 9.9 | 9.6 | 88.3 | 8.4 | 0.0 | 1.0 | 0.8 | 1.5 | 23.1 | 1482 | 73.3 | 21.1 | 4.8 | 0.7 | 4.7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 4267 | 2001 | 2266 | 9.6 | 80.3 | 7.1 | 0.5 | 0.2 | 0.0 | 3257 | 55895.0 | 4369.0 | 28068.0 | 3190.0 | 2.3 | 0 | 29.0 | 16.6 | 25.8 | 9.1 | 19.5 | 82.3 | 11.2 | 0.0 | 1.5 | 2.9 | 2.1 | 25.9 | 1849 | 75.8 | 19.7 | 4.5 | 0.0 | 6.1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 9965 | 5054 | 4911 | 0.9 | 77.5 | 16.4 | 0.0 | 3.1 | 0.0 | 7229 | 68143.0 | 14424.0 | 36905.0 | 10706.0 | 12.2 | 2 | 48.8 | 13.8 | 20.5 | 3.5 | 13.4 | 86.9 | 11.2 | 0.0 | 0.8 | 0.3 | 0.7 | 21.0 | 4787 | 71.4 | 24.1 | 4.5 | 0.0 | 2.3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 1989 columns
#enc_df = pd.DataFrame(enc.fit_transform(X_train[['State']]).toarray())
# generate binary values using get_dummies
dum_df = pd.get_dummies(X, columns=["State"], prefix=["State"] )
X_temp = temp.drop('State',axis = 1)
X_temp = X_temp.merge(dum_df)
#normalize continuous features
X_normalized = X_temp.copy()
X_normalized[continuous] = std_scaler.fit_transform(X_normalized[continuous])
X_train_newest, X_test_newest = train_test_split(X_normalized,
test_size=0.2,
random_state=1)
X_train_newest.head()
| TotalPop | Men | Women | Hispanic | White | Black | Native | Asian | Pacific | VotingAgeCitizen | Income | IncomeErr | IncomePerCap | IncomePerCapErr | Poverty | ChildPoverty | Professional | Service | Office | Construction | Production | Drive | Carpool | Transit | Walk | OtherTransp | WorkAtHome | MeanCommute | Employed | PrivateWork | PublicWork | SelfEmployed | FamilyWork | Unemployment | County_0 | County_1 | County_2 | County_3 | County_4 | County_5 | County_6 | County_7 | County_8 | County_9 | County_10 | County_11 | County_12 | County_13 | County_14 | County_15 | County_16 | County_17 | County_18 | County_19 | County_20 | County_21 | County_22 | County_23 | County_24 | County_25 | County_26 | County_27 | County_28 | County_29 | County_30 | County_31 | County_32 | County_33 | County_34 | County_35 | County_36 | County_37 | County_38 | County_39 | County_40 | County_41 | County_42 | County_43 | County_44 | County_45 | County_46 | County_47 | County_48 | County_49 | County_50 | County_51 | County_52 | County_53 | County_54 | County_55 | County_56 | County_57 | County_58 | County_59 | County_60 | County_61 | County_62 | County_63 | County_64 | County_65 | County_66 | County_67 | County_68 | County_69 | County_70 | County_71 | County_72 | County_73 | County_74 | County_75 | County_76 | County_77 | County_78 | County_79 | County_80 | County_81 | County_82 | County_83 | County_84 | County_85 | County_86 | County_87 | County_88 | County_89 | County_90 | County_91 | County_92 | County_93 | County_94 | County_95 | County_96 | County_97 | County_98 | County_99 | County_100 | County_101 | County_102 | County_103 | County_104 | County_105 | County_106 | County_107 | County_108 | County_109 | County_110 | County_111 | County_112 | County_113 | County_114 | County_115 | County_116 | County_117 | County_118 | County_119 | County_120 | County_121 | County_122 | County_123 | County_124 | County_125 | County_126 | County_127 | County_128 | County_129 | County_130 | County_131 | County_132 | County_133 | County_134 | County_135 | County_136 | County_137 | County_138 | County_139 | County_140 | County_141 | County_142 | County_143 | County_144 | County_145 | County_146 | County_147 | County_148 | County_149 | County_150 | County_151 | County_152 | County_153 | County_154 | County_155 | County_156 | County_157 | County_158 | County_159 | County_160 | County_161 | County_162 | County_163 | County_164 | County_165 | County_166 | County_167 | County_168 | County_169 | County_170 | County_171 | County_172 | County_173 | County_174 | County_175 | County_176 | County_177 | County_178 | County_179 | County_180 | County_181 | County_182 | County_183 | County_184 | County_185 | County_186 | County_187 | County_188 | County_189 | County_190 | County_191 | County_192 | County_193 | County_194 | County_195 | County_196 | County_197 | County_198 | County_199 | County_200 | County_201 | County_202 | County_203 | County_204 | County_205 | County_206 | County_207 | County_208 | County_209 | County_210 | County_211 | County_212 | County_213 | County_214 | County_215 | ... | County_1757 | County_1758 | County_1759 | County_1760 | County_1761 | County_1762 | County_1763 | County_1764 | County_1765 | County_1766 | County_1767 | County_1768 | County_1769 | County_1770 | County_1771 | County_1772 | County_1773 | County_1774 | County_1775 | County_1776 | County_1777 | County_1778 | County_1779 | County_1780 | County_1781 | County_1782 | County_1783 | County_1784 | County_1785 | County_1786 | County_1787 | County_1788 | County_1789 | County_1790 | County_1791 | County_1792 | County_1793 | County_1794 | County_1795 | County_1796 | County_1797 | County_1798 | County_1799 | County_1800 | County_1801 | County_1802 | County_1803 | County_1804 | County_1805 | County_1806 | County_1807 | County_1808 | County_1809 | County_1810 | County_1811 | County_1812 | County_1813 | County_1814 | County_1815 | County_1816 | County_1817 | County_1818 | County_1819 | County_1820 | County_1821 | County_1822 | County_1823 | County_1824 | County_1825 | County_1826 | County_1827 | County_1828 | County_1829 | County_1830 | County_1831 | County_1832 | County_1833 | County_1834 | County_1835 | County_1836 | County_1837 | County_1838 | County_1839 | County_1840 | County_1841 | County_1842 | County_1843 | County_1844 | County_1845 | County_1846 | County_1847 | County_1848 | County_1849 | County_1850 | County_1851 | County_1852 | County_1853 | County_1854 | County_1855 | County_1856 | County_1857 | County_1858 | County_1859 | County_1860 | County_1861 | County_1862 | County_1863 | County_1864 | County_1865 | County_1866 | County_1867 | County_1868 | County_1869 | County_1870 | County_1871 | County_1872 | County_1873 | County_1874 | County_1875 | County_1876 | County_1877 | County_1878 | County_1879 | County_1880 | County_1881 | County_1882 | County_1883 | County_1884 | County_1885 | County_1886 | County_1887 | County_1888 | County_1889 | County_1890 | County_1891 | County_1892 | County_1893 | County_1894 | County_1895 | County_1896 | County_1897 | County_1898 | County_1899 | County_1900 | County_1901 | County_1902 | County_1903 | County_1904 | County_1905 | County_1906 | County_1907 | County_1908 | County_1909 | County_1910 | County_1911 | County_1912 | County_1913 | County_1914 | County_1915 | County_1916 | County_1917 | County_1918 | County_1919 | County_1920 | County_1921 | County_1922 | County_1923 | County_1924 | County_1925 | County_1926 | County_1927 | County_1928 | County_1929 | County_1930 | County_1931 | County_1932 | County_1933 | County_1934 | County_1935 | County_1936 | County_1937 | County_1938 | County_1939 | County_1940 | County_1941 | County_1942 | County_1943 | County_1944 | County_1945 | County_1946 | County_1947 | County_1948 | County_1949 | County_1950 | County_1951 | County_1952 | County_1953 | County | State_0 | State_1 | State_2 | State_3 | State_4 | State_5 | State_6 | State_7 | State_8 | State_9 | State_10 | State_11 | State_12 | State_13 | State_14 | State_15 | State_16 | State_17 | State_18 | State_19 | State_20 | State_21 | State_22 | State_23 | State_24 | State_25 | State_26 | State_27 | State_28 | State_29 | State_30 | State_31 | State_32 | State_33 | State_34 | State_35 | State_36 | State_37 | State_38 | State_39 | State_40 | State_41 | State_42 | State_43 | State_44 | State_45 | State_46 | State_47 | State_48 | State_49 | State_50 | State_51 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 63252 | 0.302495 | 0.038763 | 0.551200 | 0.637536 | -1.855736 | 2.291150 | -0.161521 | -0.506081 | -0.143896 | -0.341242 | -1.089219 | -1.106704 | -0.959583 | -0.828062 | 1.331874 | 3 | -1.439874 | 2.352961 | -0.288513 | 0.696011 | 0.062916 | 0.362234 | -0.181013 | -0.066281 | -0.442114 | 0.086058 | -0.427677 | 0.016865 | 0.268450 | 0.413228 | -0.244181 | -0.386361 | -0.379143 | 1.524710 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 769 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 51329 | 0.207982 | -0.189430 | 0.590325 | -0.627395 | 0.864014 | -0.405652 | -0.161521 | -0.528314 | -0.143896 | 0.344994 | -0.822461 | -0.261028 | -0.820919 | -0.396433 | 0.712333 | 3 | -1.253687 | 0.056722 | -0.270628 | -0.633103 | 3.118580 | -0.252723 | 3.480139 | -0.461947 | -0.556169 | -0.758742 | -1.011122 | -1.195149 | 0.005973 | 1.933845 | -1.514290 | -1.149785 | -0.379143 | -0.044107 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1040 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 44968 | 1.374559 | 1.033357 | 1.666266 | -0.198531 | 0.410178 | -0.424186 | -0.094940 | 0.294318 | -0.143896 | 1.791510 | 1.950027 | 3.210737 | 4.292304 | 3.349151 | -0.816404 | 0 | 2.662878 | -1.850536 | -1.111217 | -1.289248 | -1.504429 | -4.537368 | -1.453606 | 5.352617 | 0.945564 | 1.011316 | 1.481779 | 0.735617 | 2.527172 | -0.428767 | -0.956000 | 2.746309 | -0.379143 | -0.965787 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1241 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 27081 | 0.763647 | 0.925170 | 0.582322 | -0.480108 | 0.279578 | 0.076251 | -0.161521 | 0.105335 | -0.143896 | 0.865951 | -0.014487 | -0.173601 | -0.136118 | -0.481354 | -0.357783 | 2 | -0.309456 | -0.583213 | 1.893443 | -0.262970 | 0.036574 | -0.292828 | 2.638270 | -0.375933 | -0.309049 | -0.316227 | -0.613319 | -1.237428 | 1.184437 | -1.157658 | 1.416730 | -0.202087 | -0.379143 | -0.652023 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 628 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 17411 | -0.605654 | -0.693090 | -0.501622 | -0.003593 | 0.452623 | -0.553929 | -0.161521 | -0.317098 | -0.143896 | -0.519742 | 0.617157 | 1.670872 | 0.094505 | 0.291968 | -0.993416 | 1 | 0.129412 | -0.507927 | 0.337458 | 0.897902 | -0.674658 | 0.335497 | -0.063543 | -0.461947 | -0.556169 | 1.614745 | -0.082914 | -0.293185 | -0.430594 | -0.127157 | 0.062878 | 0.192788 | -0.379143 | -0.318650 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1398 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 2041 columns
vals = {'n_hidden':30,
'C':0.01, 'epochs':20, 'eta':0.001,
'alpha':0.001, 'decrease_const':1e-5, 'minibatches':50,
'shuffle':True,'random_state':1}
nn_relu2 = TLPMiniBatch(**vals) # same as previous parameter values
%time nn_relu2.fit(X_train_newest, y_train, print_progress=True)
print_result(nn_relu2, X_train_newest, y_train, X_test_newest, y_test, title="ReLu",color="blue")
Epoch: 20/20
CPU times: user 2min 8s, sys: 22.7 s, total: 2min 31s Wall time: 1min 43s ================= ReLu : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206
print_result(nn_relu, X_train, y_train, X_test, y_test,
title="Unedited Data Loss", color='red')
print_result(nn_relu1, X_train_new, y_train, X_test_new, y_test,
title="Normalized Data Loss", color='blue')
print_result(nn_relu2, X_train_newest, y_train, X_test_newest, y_test,
title="Normalized & One Hot Encoded Data Loss", color='green')
plt.show()
================= Unedited Data Loss : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206 ================= Normalized Data Loss : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206 ================= Normalized & One Hot Encoded Data Loss : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206
After experimenting with the three models, each show an accuracy score of about 0.25. The meaninful difference in the performance is the direction of data loss. For the normalized dataset, the performance seemed got better over the twenty epochs. On the normalized, one-hot encoded and unedited datasets, the performance got worse over the 20 epochs.
from sklearn.metrics import accuracy_score
# just start with the vectorized version and minibatch
class ThreeLayerPerceptronBase(object):
def __init__(self, alpha=0.0, decrease_const=0.0, shuffle=True,
minibatches=1, n_hidden=30,
C=0.0, epochs=500, eta=0.001, random_state=None):
# need to add to the original initializer
self.alpha = alpha
self.decrease_const = decrease_const
self.shuffle = shuffle
self.minibatches = minibatches
# but keep other keywords
np.random.seed(random_state)
self.n_hidden = n_hidden
self.l2_C = C
self.epochs = epochs
self.eta = eta
@staticmethod
def _encode_labels(y):
"""Encode labels into one-hot representation"""
onehot = pd.get_dummies(y).values.T
return onehot
def _initialize_weights(self):
"""Initialize weights Glorot and the normalization."""
init_bound = 4*np.sqrt(6. / (self.n_hidden + self.n_features_ + 1))
W1 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_features_ + 1))
W1[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W2 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W2[:,:1] = 0
# reduce the final layer magnitude in order to balance the size of the gradients
# between
init_bound = 4*np.sqrt(6 / (self.n_output_ + self.n_hidden + 1))
W3 = np.random.uniform(-init_bound, init_bound,(self.n_output_, self.n_hidden + 1))
W3[:,:1] = 0
return W1, W2, W3
@staticmethod
def _relu(Z):
return np.maximum(0,Z.copy())
@staticmethod
def _sigmoid(z):
"""Use scipy.special.expit to avoid overflow"""
# 1.0 / (1.0 + np.exp(-z))
return expit(z)
@staticmethod
def _add_bias_unit(X, how='column'):
"""Add bias unit (column or row of 1s) to array at index 0"""
if how == 'column':
ones = np.ones((X.shape[0], 1))
X_new = np.hstack((ones, X))
elif how == 'row':
ones = np.ones((1, X.shape[1]))
X_new = np.vstack((ones, X))
return X_new
@staticmethod
def _L2_reg(lambda_, W1, W2, W3):
"""Compute L2-regularization cost"""
# only compute for non-bias terms
return (lambda_/2.0) * np.sqrt(np.mean(W1[:, 1:] ** 2) + np.mean(W2[:, 1:] ** 2) * np.mean(W3[:, 1:] ** 2))
def _cost(self,A4,Y_enc,W1,W2,W3):
'''Get the objective function value'''
cost = -np.mean(np.nan_to_num((Y_enc*np.log(A4)+(1-Y_enc)*np.log(1-A4))))
L2_term = self._L2_reg(self.l2_C, W1, W2,W3)
return cost + L2_term
def _feedforward(self, X, W1, W2, W3):
"""Compute feedforward step
"""
# A1->W1->ReLu->A2->W2->Sigmoid
A1 = self._add_bias_unit(X.T, how='row')
Z1 = W1 @ A1
A2 = self._relu(Z1)
A2 = self._add_bias_unit(A2, how='row')
Z2 = W2 @ A2
A3 = self._relu(Z2)
A3 = self._add_bias_unit(A3, how='row')
Z3 = W3 @ A3
A4 = self._sigmoid(Z3) # never use relu as last layer for classification (yuck!)
return A1, Z1, A2, Z2, A3, Z3, A4
def _get_gradient(self, A1, A2, A3, A4, Z1, Z2, Z3, Y_enc, W1, W2, W3):
""" Compute gradient step using backpropagation.
"""
# vectorized backpropagation
V3 = (A4-Y_enc)
# V3[Z2<=0] = 0 # could can change to be relu back prop on this layer too!
# old update: V1 = A2*(1-A2)*(W2.T @ V2)
# the derivative of sigmoid was A2(1-A2), but now that is relu
# so we change it to:
Z1_with_bias = self._add_bias_unit(Z1,how='row')
Z2_with_bias = self._add_bias_unit(Z2,how='row')
V2 = (W3.T @ V3)
V2[Z1_with_bias<=0] = 0
# relu derivative only zeros out certain values! easy!
grad3 = V3 @ A3.T
grad2 = V2[1:,:] @ A2.T
grad1 = V2[1:,:] @ A1.T
# regularize weights that are not bias terms
grad1[:, 1:] += (W1[:, 1:] * self.l2_C)
grad2[:, 1:] += (W2[:, 1:] * self.l2_C)
grad3[:, 1:] += (W3[:, 1:] * self.l2_C)
return grad1, grad2, grad3
def predict(self, X):
"""Predict class labels"""
_, _, _, _, _, _, A4 = self._feedforward(X, self.W1, self.W2, self.W3 )
y_pred = np.argmax(A4, axis=0)
return y_pred
def fit(self, X, y, print_progress=False, encode=False):
""" Learn weights from training data. With mini-batch"""
X_data, y_data = X.copy(), y.copy()
Y_enc = self._encode_labels(y)
# init weights and setup matrices
self.n_features_ = X_data.shape[1]
self.n_output_ = Y_enc.shape[0]
self.W1, self.W2, self.W3 = self._initialize_weights()
delta_W1_prev = np.zeros(self.W1.shape)
delta_W2_prev = np.zeros(self.W2.shape)
delta_W3_prev = np.zeros(self.W3.shape)
self.cost_ = []
self.score_ = []
# get starting acc
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
for i in range(self.epochs):
# adaptive learning rate
self.eta /= (1 + self.decrease_const*i)
if print_progress>0 and (i+1)%print_progress==0:
sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs))
sys.stderr.flush()
if self.shuffle:
idx_shuffle = np.random.permutation(y_data.shape[0])
X_data, Y_enc, y_data = X_data.reindex([idx_shuffle]), Y_enc[:, idx_shuffle], y_data[idx_shuffle]
mini = np.array_split(range(y_data.shape[0]), self.minibatches)
mini_cost = []
for idx in mini:
# feedforward
A1, Z1, A2, Z2, A3, Z3, A4 = self._feedforward(X_data.reindex([idx]),
self.W1,
self.W2,
self.W3)
cost = self._cost(A4,Y_enc[:, idx],self.W1,self.W2,self.W3)
mini_cost.append(cost) # this appends cost of mini-batch only
# compute gradient via backpropagation
grad1, grad2, grad3 = self._get_gradient(A1=A1, A2=A2, A3=A3, A4=A4, Z1=Z1, Z2=Z2, Z3=Z3,
Y_enc=Y_enc[:, idx],
W1=self.W1,W2=self.W2,W3=self.W3)
# momentum calculations
delta_W1, delta_W2, delta_W3 = self.eta * grad1, self.eta * grad2, self.eta * grad3
self.W1 -= (delta_W1 + (self.alpha * delta_W1_prev))
self.W2 -= (delta_W2 + (self.alpha * delta_W2_prev))
self.W3 -= (delta_W3 + (self.alpha * delta_W3_prev))
delta_W1_prev, delta_W2_prev, delta_W3_prev = delta_W1, delta_W2, delta_W3
self.cost_.append(mini_cost)
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
return self
nn_3 = ThreeLayerPerceptronBase(**vals)
%time nn_3.fit(X_train_newest, y_train, print_progress=1)
print('Done!')
print_result(nn_3, X_train_newest, y_train, X_test_newest, y_test, title="3 Layer Perceptron")
plt.show()
Epoch: 20/20
CPU times: user 2min 13s, sys: 23 s, total: 2min 36s Wall time: 1min 44s Done! ================= 3 Layer Perceptron : Resubstitution acc: 0.2522088905696703 Validation acc: 0.24456820682068206
from sklearn.metrics import accuracy_score
# just start with the vectorized version and minibatch
class FourLayerPerceptronBase(object):
def __init__(self, alpha=0.0, decrease_const=0.0, shuffle=True,
minibatches=1, n_hidden=30,
C=0.0, epochs=500, eta=0.001, random_state=None):
# need to add to the original initializer
self.alpha = alpha
self.decrease_const = decrease_const
self.shuffle = shuffle
self.minibatches = minibatches
# but keep other keywords
np.random.seed(random_state)
self.n_hidden = n_hidden
self.l2_C = C
self.epochs = epochs
self.eta = eta
@staticmethod
def _encode_labels(y):
"""Encode labels into one-hot representation"""
onehot = pd.get_dummies(y).values.T
return onehot
def _initialize_weights(self):
"""Initialize weights Glorot and the normalization."""
init_bound = 4*np.sqrt(6. / (self.n_hidden + self.n_features_ + 1))
W1 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_features_ + 1))
W1[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W2 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W2[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W3 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W3[:,:1] = 0
# reduce the final layer magnitude in order to balance the size of the gradients
# between
init_bound = 4*np.sqrt(6 / (self.n_output_ + self.n_hidden + 1))
W4 = np.random.uniform(-init_bound, init_bound,(self.n_output_, self.n_hidden + 1))
W4[:,:1] = 0
return W1, W2, W3, W4
@staticmethod
def _relu(Z):
return np.maximum(0,Z.copy())
@staticmethod
def _sigmoid(z):
"""Use scipy.special.expit to avoid overflow"""
# 1.0 / (1.0 + np.exp(-z))
return expit(z)
@staticmethod
def _add_bias_unit(X, how='column'):
"""Add bias unit (column or row of 1s) to array at index 0"""
if how == 'column':
ones = np.ones((X.shape[0], 1))
X_new = np.hstack((ones, X))
elif how == 'row':
ones = np.ones((1, X.shape[1]))
X_new = np.vstack((ones, X))
return X_new
@staticmethod
def _L2_reg(lambda_, W1, W2, W3, W4):
"""Compute L2-regularization cost"""
# only compute for non-bias terms
return (lambda_/2.0) * np.sqrt(np.mean(W1[:, 1:] ** 2) + np.mean(W2[:, 1:] ** 2) * np.mean(W3[:, 1:] ** 2) + np.sqrt(np.mean(W4[:, 1:] ** 2) )
def _cost(self,A5,Y_enc,W1,W2,W3,W4):
'''Get the objective function value'''
cost = -np.mean(np.nan_to_num((Y_enc*np.log(A5)+(1-Y_enc)*np.log(1-A5))))
L2_term = self._L2_reg(self.l2_C, W1, W2,W3,W4)
return cost + L2_term
def _feedforward(self, X, W1, W2, W3):
"""Compute feedforward step
"""
# A1->W1->ReLu->A2->W2->Sigmoid
A1 = self._add_bias_unit(X.T, how='row')
Z1 = W1 @ A1
A2 = self._relu(Z1)
A2 = self._add_bias_unit(A2, how='row')
Z2 = W2 @ A2
A3 = self._relu(Z2)
A3 = self._add_bias_unit(A3, how='row')
Z3 = W3 @ A3
A4 = self._relu(Z3)
A4 = self._add_bias_unit(A4, how='row')
Z4 = W4 @ A4
A4 = self._sigmoid(Z4) # never use relu as last layer for classification (yuck!)
return A1, Z1, A2, Z2, A3, Z3, A4, Z4, A5
def _get_gradient(self, A1, A2, A3, A4, A5 Z1, Z2, Z3, Z4 Y_enc, W1, W2, W3, W4):
""" Compute gradient step using backpropagation.
"""
# vectorized backpropagation
V4 = (A5-Y_enc)
# V3[Z2<=0] = 0 # could can change to be relu back prop on this layer too!
# old update: V1 = A2*(1-A2)*(W2.T @ V2)
# the derivative of sigmoid was A2(1-A2), but now that is relu
# so we change it to:
Z1_with_bias = self._add_bias_unit(Z1,how='row')
Z2_with_bias = self._add_bias_unit(Z2,how='row')
Z3_with_bias = self._add_bias_unit(Z3,how='row')
V3 = (W4.T @ V4)
V3[Z1_with_bias<=0] = 0
# relu derivative only zeros out certain values! easy!
grad4 = V4 @ A4.T
grad3 = V3[1:,:] @ A3.T
grad2 = V3[1:,:] @ A2.T
grad1 = V3[1:,:] @ A1.T
# regularize weights that are not bias terms
grad1[:, 1:] += (W1[:, 1:] * self.l2_C)
grad2[:, 1:] += (W2[:, 1:] * self.l2_C)
grad3[:, 1:] += (W3[:, 1:] * self.l2_C)
grad4[:, 1:] += (W4[:, 1:] * self.l2_C)
return grad1, grad2, grad3, grad4
def predict(self, X):
"""Predict class labels"""
_, _, _, _, _, _, _, _, A5 = self._feedforward(X, self.W1, self.W2, self.W3, self.W4 )
y_pred = np.argmax(A5, axis=0)
return y_pred
def fit(self, X, y, print_progress=False, encode=False):
""" Learn weights from training data. With mini-batch"""
X_data, y_data = X.copy(), y.copy()
Y_enc = self._encode_labels(y)
# init weights and setup matrices
self.n_features_ = X_data.shape[1]
self.n_output_ = Y_enc.shape[0]
self.W1, self.W2, self.W3, self.W4 = self._initialize_weights()
delta_W1_prev = np.zeros(self.W1.shape)
delta_W2_prev = np.zeros(self.W2.shape)
delta_W3_prev = np.zeros(self.W3.shape)
delta_W4_prev = np.zeros(self.W4.shape)
self.cost_ = []
self.score_ = []
# get starting acc
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
for i in range(self.epochs):
# adaptive learning rate
self.eta /= (1 + self.decrease_const*i)
if print_progress>0 and (i+1)%print_progress==0:
sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs))
sys.stderr.flush()
if self.shuffle:
idx_shuffle = np.random.permutation(y_data.shape[0])
X_data, Y_enc, y_data = X_data.reindex([idx_shuffle]), Y_enc[:, idx_shuffle], y_data[idx_shuffle]
mini = np.array_split(range(y_data.shape[0]), self.minibatches)
mini_cost = []
for idx in mini:
# feedforward
A1, Z1, A2, Z2, A3, Z3, A4, Z4, A5 = self._feedforward(X_data.reindex([idx]),
self.W1,
self.W2,
self.W3,
self.W4)
cost = self._cost(A4,Y_enc[:, idx],self.W1,self.W2,self.W3, self.W4)
mini_cost.append(cost) # this appends cost of mini-batch only
# compute gradient via backpropagation
grad1, grad2, grad3 = self._get_gradient(A1=A1, A2=A2, A3=A3, A4=A4, A5=A5. Z1=Z1, Z2=Z2, Z3=Z3,Z4=Z4,
Y_enc=Y_enc[:, idx],
W1=self.W1,W2=self.W2,W3=self.W3,W4=self.W4)
# momentum calculations
delta_W1, delta_W2, delta_W3, delta_W4 = self.eta * grad1, self.eta * grad2, self.eta * grad3, elf.eta * grad4
self.W1 -= (delta_W1 + (self.alpha * delta_W1_prev))
self.W2 -= (delta_W2 + (self.alpha * delta_W2_prev))
self.W3 -= (delta_W3 + (self.alpha * delta_W3_prev))
self.W4 -= (delta_W4 + (self.alpha * delta_W4_prev))
delta_W1_prev, delta_W2_prev, delta_W3_prev, delta_W4_prev = delta_W1, delta_W2, delta_W3, delta_W4
self.cost_.append(mini_cost)
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
return self
File "<ipython-input-21-acdba9900381>", line 76 def _cost(self,A5,Y_enc,W1,W2,W3,W4): ^ SyntaxError: invalid syntax
class FourLayerPerceptron(ThreeLayerPerceptronBase):
def __init__(self, alpha=0.0, decrease_const=0.0, shuffle=True,
minibatches=1, **kwds):
# need to add to the original initializer
self.alpha = alpha
self.decrease_const = decrease_const
self.shuffle = shuffle
self.minibatches = minibatches
# but keep other keywords
super().__init__(**kwds)
def _initialize_weights(self):
"""Initialize weights Glorot and the normalization."""
init_bound = 4*np.sqrt(6. / (self.n_hidden + self.n_features_ + 1))
W1 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_features_ + 1))
W1[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W2 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W2[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W3 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W3[:,:1] = 0
init_bound = 4*np.sqrt(6. / (2*self.n_hidden + 1))
W4 = np.random.uniform(-init_bound, init_bound,(self.n_hidden, self.n_hidden + 1))
W4[:,:1] = 0
# reduce the final layer magnitude in order to balance the size of the gradients
# between
init_bound = 4*np.sqrt(6 / (self.n_output_ + self.n_hidden + 1))
W5 = np.random.uniform(-init_bound, init_bound,(self.n_output_, self.n_hidden + 1))
W5[:,:1] = 0
return W1, W2, W3, W4, W5
@staticmethod
def _L2_reg(lambda_, W1, W2, W3, W4, W5):
"""Compute L2-regularization cost"""
# only compute for non-bias terms
return (lambda_/2.0) * np.sqrt(np.mean(W1[:, 1:] ** 2) + np.mean(W2[:, 1:] ** 2) * np.mean(W3[:, 1:] ** 2) + np.sqrt(np.mean(W4[:, 1:] ** 2) + np.sqrt(np.mean(W5[:, 1:] ** 2) )
def _cost(self,A6,Y_enc,W1,W2,W3,W4,W5):
'''Get the objective function value'''
cost = -np.mean(np.nan_to_num((Y_enc*np.log(A6)+(1-Y_enc)*np.log(1-A6))))
L2_term = self._L2_reg(self.l2_C, W1, W2,W3, W4, W5)
return cost + L2_term
def _feedforward(self, X, W1, W2, W3, W4):
"""Compute feedforward step
"""
# A1->W1->ReLu->A2->W2->Sigmoid
A1 = self._add_bias_unit(X.T, how='row')
Z1 = W1 @ A1
A2 = self._relu(Z1)
A2 = self._add_bias_unit(A2, how='row')
Z2 = W2 @ A2
A3 = self._relu(Z2)
A3 = self._add_bias_unit(A3, how='row')
Z3 = W3 @ A3
A4 = self._relu(Z3)
A4 = self._add_bias_unit(A4, how='row')
Z4 = W4 @ A4
A5 = self._relu(Z4)
A5 = self._add_bias_unit(A5, how='row')
Z5 = W5 @ A5
A6 = self._sigmoid(Z5) # never use relu as last layer for classification (yuck!)
return A1, Z1, A2, Z2, A3, Z3, A4, Z4, A5, Z5, A6
def _get_gradient(self, A1, A2, A3, A4, A5, Z1, Z2, Z3, Z4, Z5, Y_enc, W1, W2, W3, W4, W5):
""" Compute gradient step using backpropagation.
"""
# vectorized backpropagation
V5 = (A6-Y_enc)
# V3[Z2<=0] = 0 # could can change to be relu back prop on this layer too!
# old update: V1 = A2*(1-A2)*(W2.T @ V2)
# the derivative of sigmoid was A2(1-A2), but now that is relu
# so we change it to:
Z1_with_bias = self._add_bias_unit(Z1,how='row')
Z2_with_bias = self._add_bias_unit(Z2,how='row')
Z3_with_bias = self._add_bias_unit(Z3,how='row')
Z4_with_bias = self._add_bias_unit(Z3,how='row')
V4 = (W5.T @ V5)
V4[Z4_with_bias<=0] = 0
# relu derivative only zeros out certain values! easy!
grad5 = V5 @ A5.T
grad4 = V3[1:,:] @ A4.T
grad3 = V3[1:,:] @ A3.T
grad2 = V3[1:,:] @ A2.T
grad1 = V3[1:,:] @ A1.T
# regularize weights that are not bias terms
grad1[:, 1:] += (W1[:, 1:] * self.l2_C)
grad2[:, 1:] += (W2[:, 1:] * self.l2_C)
grad3[:, 1:] += (W3[:, 1:] * self.l2_C)
grad4[:, 1:] += (W4[:, 1:] * self.l2_C)
grad5[:, 1:] += (W5[:, 1:] * self.l2_C)
return grad1, grad2, grad3, grad4, grad5
def predict(self, X):
"""Predict class labels"""
_, _, _, _, _, _, _, _, _, _, A6 = self._feedforward(X, self.W1, self.W2, self.W3, self.W4, self.W5 )
y_pred = np.argmax(A6, axis=0)
return y_pred
def fit(self, X, y, print_progress=False, encode=False):
""" Learn weights from training data. With mini-batch"""
X_data, y_data = X.copy(), y.copy()
Y_enc = self._encode_labels(y)
# init weights and setup matrices
self.n_features_ = X_data.shape[1]
self.n_output_ = Y_enc.shape[0]
self.W1, self.W2, self.W3 = self._initialize_weights()
delta_W1_prev = np.zeros(self.W1.shape)
delta_W2_prev = np.zeros(self.W2.shape)
delta_W3_prev = np.zeros(self.W3.shape)
delta_W4_prev = np.zeros(self.W4.shape)
delta_W5_prev = np.zeros(self.W5.shape)
self.cost_ = []
self.score_ = []
# get starting acc
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
for i in range(self.epochs):
# adaptive learning rate
self.eta /= (1 + self.decrease_const*i)
if print_progress>0 and (i+1)%print_progress==0:
sys.stderr.write('\rEpoch: %d/%d' % (i+1, self.epochs))
sys.stderr.flush()
if self.shuffle:
idx_shuffle = np.random.permutation(y_data.shape[0])
X_data, Y_enc, y_data = X_data.reindex([idx_shuffle]), Y_enc[:, idx_shuffle], y_data[idx_shuffle]
mini = np.array_split(range(y_data.shape[0]), self.minibatches)
mini_cost = []
for idx in mini:
# feedforward
A1, Z1, A2, Z2, A3, Z3, A4, Z4, A5, Z5, A6 = self._feedforward(X_data.reindex([idx]),
self.W1,
self.W2,
self.W3,
self.W4,
self.W5)
cost = self._cost(A6,Y_enc[:, idx],self.W1,self.W2,self.W3, self.W4, self.W5)
mini_cost.append(cost) # this appends cost of mini-batch only
# compute gradient via backpropagation
grad1, grad2, grad3, grad = self._get_gradient(A1=A1, A2=A2, A3=A3, A4=A4, A5=A5, A6=A6 Z1=Z1, Z2=Z2, Z3=Z3,Z4=Z4,Z5=Z5
Y_enc=Y_enc[:, idx],
W1=self.W1,W2=self.W2,W3=self.W3,W4=self.W4, self.W5)
# momentum calculations
delta_W1, delta_W2, delta_W3, delta_W4, delta_W5 = self.eta * grad1, self.eta * grad2, self.eta * grad3, self.eta * grad4, self.eta * grad5
self.W1 -= (delta_W1 + (self.alpha * delta_W1_prev))
self.W2 -= (delta_W2 + (self.alpha * delta_W2_prev))
self.W3 -= (delta_W3 + (self.alpha * delta_W3_prev))
self.W4 -= (delta_W4 + (self.alpha * delta_W4_prev))
self.W5 -= (delta_W5 + (self.alpha * delta_W5_prev))
delta_W1_prev, delta_W2_prev, delta_W3_prev, delta_W4_prev, delta_W5_prev = delta_W1, delta_W2, delta_W3, delta_W4, delta_W5
self.cost_.append(mini_cost)
self.score_.append(accuracy_score(y_data,self.predict(X_data)))
return self
File "<ipython-input-22-042a1fbe6ee3>", line 45 def _cost(self,A6,Y_enc,W1,W2,W3,W4,W5): ^ SyntaxError: invalid syntax